---
title: "Analysis: Accuracy Discernment"
output: html_document
date: "2025-03-20"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Description

The code below creates the accuracy discernment figure ('Partisan Differences in Accuracy Discernment') in the paper.

# Setup 

```{r}

# clearing memory ------------------------------------------------------------
rm(list = ls())

# load packages ----------------------------------------------------------------
library(here)
library(tidyverse)
library(ggplot2)
library(survey)
library(srvyr)
library(ggeffects)
library(stargazer)
library(broom)
library(ggstance)
library(ggpubr)
library(arm)
library(gridExtra)
library(doParallel)
library(foreach)
library(broom)
library(margins)
library(patchwork)
library(relimp, pos = 4)
library(paran)
library(clarify)
library(viridis)
library(stringr)


# useful functions -------------------------------------------------------------

# function to calculate standard error of mean
std_error <- function(var){
  sd(var, na.rm = T) / sqrt(length(var[!is.na(var)]))
}

# function to manually bin continous variables
bin_fun <- function(x, bin_size){
  bin_size * ceiling(x / bin_size) - bin_size / 2
}

# function to put variables on a 0-1 scale
rescale_01 <- function(x, max){
  (x-1)/(max-1)
}

# function to compute two-way clustered standard errors with survey weights
source(here("code", "helper functions", "glmweave.R"))

# set n_sim
n_sim <- 10000 # higher n_sim to get more precise p values

# set seed
set.seed(4787)

# plotting colors
cb_black <- "#000000" # Discernment / all headlines
cb_blue <- "#0072B2"  # Democratic headlines
cb_red<- "#D55E00"    # Republican headlines
cb_green = "#009E73"  # True headlines
cb_orange = "#E69F00" # False headlines

```

# Import Data

```{r}

# mturk pre-test survey data (MTurk sample)
mturk <- read.csv(here("data", "pretest", "Feb_2020_Pretest_New_Stimuli_W20_MTurk.csv"))

# key that matches different headline IDs together
key <- read_xlsx(here("data", "pretest", "feb_20_qualtrics_ids.xlsx"))

# yougov's key that matches different headline IDs together
request <- read.csv(here("data", "pretest", "guay_data_request_2024.csv"))

```

# Clean MTurk Data

```{r}

# rename variables
names(mturk) <- str_replace(names(mturk), "f_fav", "partisan")
names(mturk) <- str_replace(names(mturk), "f_true", "true")

# create vectors of variable names
partisan_vars <- colnames(mturk)[str_detect(colnames(mturk), "partisan")]
true_vars <- colnames(mturk)[str_detect(colnames(mturk), "true")]

# rater party
mturk$respondent_party_bg <- ifelse(mturk$DemRep_C < 4, "dem", "rep")

# clean mturk data
mturk_cleaned <- 
  mturk %>%
  # only include respondents who passed the two attention checks
  filter(Random != 1,  
         Google != 1) %>%
  dplyr::select(
    caseid = V1, 
    party = respondent_party_bg,
    all_of(true_vars), 
    all_of(partisan_vars)
  ) %>%
  gather(key, rating, -caseid, -party) %>%
  separate(key, into = c("characteristic", "hdl_number"), sep = "_") 

# merge 'file name' from key into mturk_cleaned ---------------------------------

mturk_cleaned_2 <- 
  merge(mturk_cleaned,
        key %>% dplyr::select(file_name = headline, hdl_number = `Item#`), 
        by = "hdl_number")

dim(mturk_cleaned)
dim(mturk_cleaned_2)

# merge yougov hdl_id from request into mturk_cleaned -------------------------------

mturk_cleaned_3 <- 
  merge(mturk_cleaned_2,
        request %>% dplyr::select(hdl_id = varname, file_name) %>%
          mutate(file_name_tmp = str_replace(file_name, " f_", "f_")) %>%
          mutate(file_name = str_replace(file_name_tmp, " r_", "r_"))) %>%
  dplyr::select(-file_name_tmp) %>%
  mutate(rating_01 = rescale_01(rating, max = 6)) %>%
  mutate(hdl_true = ifelse(str_detect(hdl_id, "REAL"), 1, 0), 
         hdl_party = ifelse(str_detect(hdl_id, "DEM"), "dem", "rep")) %>%
  mutate(hdl_agreeable = ifelse(hdl_party == party, 1, 0))
  

dim(mturk_cleaned_2)
dim(mturk_cleaned_3)

# verify that the mturk data matches the excel sheet containing summary stats used elsewhere in the project --------

# import edited summary sheet
summary_sheet <- read_xlsx(here("data", 
                                "pretest", 
                                "Pretest6 results_bg_edited.xlsx"))

# format table from summary sheet
summary_tbl_final <- 
  summary_sheet %>%
  dplyr::select(file_name = Headline, 
                partisan_demrater = partisan_dem_rater, 
                partisan_reprater = partisan_rep_rater, 
                true_demrater = true_dem, 
                true_reprater = true_rep) %>%
  gather(key, summary_mean, -file_name) %>%
  separate(key, into = c("characteristic", "party")) %>%
  mutate(party = str_remove(party, "rater")) 

# make mturk data comparable to summary_tbl
mturk_tbl_final <- 
  mturk_cleaned_3%>%
  mutate(hdl_true = ifelse(str_detect(hdl_id, "REAL"), 1, 0), 
         hdl_party = ifelse(str_detect(hdl_id, "DEM"), "dem", 
                            ifelse(str_detect(hdl_id, "REP"), "rep", NA))) %>%
  group_by(file_name, characteristic, party) %>%
  summarize(mturk_mean = mean(rating, na.rm=T)) %>%
  na.omit()

merged <- 
  merge(mturk_tbl_final, summary_tbl_final, by = c("file_name", "characteristic", "party"))

dim(mturk_tbl_final)
dim(summary_tbl_final)
dim(merged)

# vector of 'file_names' (headline image file names) used in YouGov survey
file_name_used_0 <- str_remove(unique(request$file_name), " ")
file_name_used <- file_name_used_0[!str_detect(file_name_used_0, "c_")]

merged_short <- merged[merged$file_name %in% file_name_used,]
dim(merged_short)

# excel sheet containing summary stats matches raw Qualtrics data -------------------------------
with(merged_short, cor(mturk_mean, summary_mean, use = "complete.obs"))

```

# Belief Discernment Figure 

```{R}

fig_accuracy_agreeable <- 
  
mturk_cleaned_3 %>%
  filter(characteristic == "true") %>%
  group_by(party, hdl_true, hdl_agreeable) %>%
  summarize(mean = mean(rating_01, na.rm=T), 
            se = std_error(rating_01)) %>%
  
  mutate(hdl_true = ifelse(hdl_true == 1, "True", "False")) %>%

  filter(hdl_agreeable == 1) %>%

  ggplot(aes(x = factor(hdl_true), 
             y = mean, 
             ymin = mean - 1.96*se, 
             ymax = mean + 1.96*se,
             label = round(mean, 2),
             fill = factor(party))) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(size = .3, width = .2) + 
  geom_text(aes(y = mean + 1.96*se), 
            nudge_y = .02, size = 3) + 
  facet_wrap(vars(party)) + 
  theme_bw() + 
  scale_fill_manual(breaks = c("dem", "rep"), 
                    labels = c("Democrats", "Republicans"), 
                    values = c(cb_blue, cb_red), 
                    name = "") + 
  xlab("Headline Veracity") + 
  ylab("Mean Accuracy Rating (0-1)") + 
  ggtitle("Agreeable Headlines") + 
  theme(legend.position = "none") + 
  xlab("") + 
  ylim(c(0, .65))

fig_accuracy_disagreeable <- 

mturk_cleaned_3 %>%
  filter(characteristic == "true") %>%
  group_by(party, hdl_true, hdl_agreeable) %>%
  summarize(mean = mean(rating_01, na.rm=T), 
            se = std_error(rating_01)) %>%
  
  mutate(hdl_true = ifelse(hdl_true == 1, "True", "False")) %>%
  
  filter(hdl_agreeable == 0) %>%
  
  ggplot(aes(x = factor(hdl_true), 
             y = mean, 
             ymin = mean - 1.96*se, 
             ymax = mean + 1.96*se,
             label = round(mean, 2),
             fill = factor(party))) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(size = .3, width = .2) + 
  geom_text(aes(y = mean + 1.96*se), 
            nudge_y = .02, size = 3) + 
  facet_wrap(vars(party)) + 
  theme_bw() + 
  scale_fill_manual(breaks = c("dem", "rep"), 
                    labels = c("Democrats", "Republicans"), 
                    values = c(cb_blue, cb_red), 
                    name = "") + 
  xlab("Headline Veracity") + 
  ylab("Mean Accuracy Rating (0-1)") + 
  ggtitle("Disagreeable Headlines") + 
  theme(legend.position = "none") + 
  xlab("") + 
  ylim(c(0, .65))

ggsave(fig_accuracy_agreeable + fig_accuracy_disagreeable + 
         plot_annotation(tag_levels = 'A'), 
       width = 8, height = 4, filename = here("figures", "main", "fig_accuracy.pdf"))

```

# Statistical Significance Tests

```{r}

# test whether differences in figure are statistically significant ---------------------------------

mturk_cleaned_3$party_dem <- ifelse(mturk_cleaned_3$party == "dem", 1, 0)

## Agreeable Headlines (m2nc)

m2_data <- 
  mturk_cleaned_3 %>%
  filter(characteristic == "true") %>%
  dplyr::select(caseid, party_dem, hdl_id,
                rating_01, hdl_true, hdl_agreeable) %>%
  filter(hdl_agreeable == 1) %>%
  na.omit()

m2_all <-  
  glm(rating_01 ~ hdl_true*party_dem,
      family = "gaussian", data = m2_data)

m2_true <-  
  glm(rating_01 ~ party_dem,
      family = "gaussian", data = m2_data %>% filter(hdl_true == 1))

m2_false <-  
  glm(rating_01 ~ party_dem,
      family = "gaussian", data = m2_data %>% filter(hdl_true == 0))

m2_sim <- 
  clarify::sim(m2_all, n = n_sim, vcov = xeffect.glm(glm.obj = m2_all, g1 = m2_all$data$caseid, g2 = m2_all$data$hdl_id))

m2_sim_apply <- 
  sim_apply(m2_sim, cl = 6,
            function(fit){
              true_right <- predict(fit, newdata = m2_data %>% mutate(hdl_true = 1, party_dem = 0), type = "response")
              true_left <- predict(fit, newdata = m2_data %>% mutate(hdl_true = 1, party_dem = 1), type = "response")
              false_right <- predict(fit, newdata = m2_data %>% mutate(hdl_true = 0, party_dem = 0), type = "response")
              false_left <- predict(fit, newdata = m2_data %>% mutate(hdl_true = 0, party_dem = 1), type = "response")
              
              c(true_right = mean(true_right), true_left = mean(true_left), 
                false_right = mean(false_right), false_left = mean(false_left))})

m2_transformed <-
  transform(m2_sim_apply, 
          #  `true_right - false_right` = true_right - false_right,
          #  `true_left - false_left` = true_left - false_left,
            `true_right / false_right` = true_right / false_right,
            `true_left / false_left` = true_left / false_left) %>%
  transform(#`diff` = `true_left - false_left` - `true_right - false_right` , 
            `ratio` = `true_left / false_left` / `true_right / false_right`)


## Disagreeable Headlines (m3nc)

m3_data <- 
  mturk_cleaned_3 %>%
  filter(characteristic == "true") %>%
  dplyr::select(caseid, party_dem, hdl_id,
                rating_01, hdl_true, hdl_agreeable) %>%
  filter(hdl_agreeable == 0) %>%
  na.omit()

m3_all <-  
  glm(rating_01 ~ hdl_true*party_dem,
      family = "gaussian", data = m3_data)

m3_true <-  
  glm(rating_01 ~ party_dem,
      family = "gaussian", data = m3_data %>% filter(hdl_true == 1))

m3_false <-  
  glm(rating_01 ~ party_dem,
      family = "gaussian", data = m3_data %>% filter(hdl_true == 0))

m3_sim <- 
  clarify::sim(m3_all, n = n_sim, vcov = xeffect.glm(glm.obj = m3_all, g1 = m3_all$data$caseid, g2 = m3_all$data$hdl_id))

m3_sim_apply <- 
  sim_apply(m3_sim, cl = 6,
            function(fit){
              true_right <- predict(fit, newdata = m3_data %>% mutate(hdl_true = 1, party_dem = 0), type = "response")
              true_left <- predict(fit, newdata = m3_data %>% mutate(hdl_true = 1, party_dem = 1), type = "response")
              false_right <- predict(fit, newdata = m3_data %>% mutate(hdl_true = 0, party_dem = 0), type = "response")
              false_left <- predict(fit, newdata = m3_data %>% mutate(hdl_true = 0, party_dem = 1), type = "response")
              
              c(true_right = mean(true_right), true_left = mean(true_left), 
                false_right = mean(false_right), false_left = mean(false_left))})

m3_transformed <-
  transform(m3_sim_apply, 
            `true_right - false_right` = true_right - false_right,
            `true_left - false_left` = true_left - false_left,
            `true_right / false_right` = true_right / false_right,
            `true_left / false_left` = true_left / false_left) %>%
  transform(`diff` = `true_left - false_left` - `true_right - false_right` , 
            `ratio` = `true_left / false_left` / `true_right / false_right`)


# results -----------------------------------------------------------------------------------------

# agreeable
summary(m2_transformed) %>% tail(7)
summary(m2_transformed, parm = "ratio", null = 1)

# disagreeeable
summary(m3_transformed) %>% tail(14)
summary(m3_transformed, parm = "ratio", null = 1)

```
